ec<-read_delim("Ecoli_AST.tsv.gz") %>% rename(BioSample=`#BioSample`)
length(unique(ec$BioSample))
## [1] 9094
ec %>% group_by(BioSample) %>% summarise(n=length(unique((Antibiotic))))%>% pull(n) %>% summary()
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 14.00 15.00 15.33 18.00 36.00
refgenes <- read_delim("refgenes_20241003.tsv")
ec_bin <-read_delim("ecoli_output-full_binaryAMR.csv.gz")
ec_gene_counts <- ec_bin %>% select(-uberstrain, -assembly_barcode) %>% colSums() %>% enframe() %>% rename(gene=name, count=value)
ec_gene_counts <- ec_gene_counts %>% left_join(refgenes, by=join_by("gene"=="#Allele")) %>% filter(`Whitelisted taxa`=="Escherichia" | is.na(`Whitelisted taxa`))
ec_gene_counts_alleleMatch <- ec_gene_counts %>% filter(!is.na(Class))
ec_gene_counts_noAlleleMatch <- ec_gene_counts %>% filter(is.na(Class)) %>% select(gene,count)
ec_gene_counts_geneFamilyMatch <- ec_gene_counts_noAlleleMatch %>% left_join(refgenes, by=join_by("gene"=="Gene family")) %>%
filter(`Whitelisted taxa`=="Escherichia" | is.na(`Whitelisted taxa`)) %>% group_by(gene) %>% filter(row_number()==1)
ec_gene_counts <- bind_rows(ec_gene_counts_alleleMatch, ec_gene_counts_geneFamilyMatch) %>% relocate(`#Allele`, .after=count)
ec_full <-read_delim("ecoli-output-full.csv.gz") %>%
select(uberstrain, assembly_barcode, sample_accession, secondary_sample_accession, country, amrfinder_db_version, amrfinder_version, Country, Pathovar, `Clermont Type (ClermonTyping)`, `Clermont Type (EzClermont)`)
ec_amr_meta <- left_join(ec_full, ec_bin, by=c("uberstrain", "assembly_barcode"))
sum(unique(ec$BioSample) %in% ec_amr_meta$sample_accession)
## [1] 6987
sum(unique(ec$BioSample) %in% ec_amr_meta$secondary_sample_accession)
## [1] 0
# AST data for strains with matching AMRfinder calls from Enterobase
ec_amrfinder_ast <- ec %>% filter(BioSample %in% ec_full$sample_accession)
# AMRfinder binary matrix from Enterobase, for strains with matching AST data from NCBI
amrfinder_full <- ec_amr_meta %>% filter(sample_accession %in% ec$BioSample)
pmrB_Y358N <- summarise_marker_dist_ecoli(ec_amr_meta=ec_amr_meta, marker="pmrB_Y358N")
glpT_E448K <- summarise_marker_dist_ecoli(ec_amr_meta=ec_amr_meta, marker="glpT_E448K")
(glpT_E448K$clermont_plot + pmrB_Y358N$clermont_plot + plot_layout(axes="collect")) /
(glpT_E448K$pathovar_plot + pmrB_Y358N$pathovar_plot + plot_layout(axes="collect"))
cef_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftriaxone", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
cef_cephalosporin$solo_stats
## # A tibble: 64 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 2 resistant 0 0 0 0 0
## 2 ampC_C-42T 28 resistant 1 0.0357 0.0351 0 0.104
## 3 ampC_T-32A 50 resistant 4 0.08 0.0384 0.00480 0.155
## 4 blaCMY 6 resistant 3 0.5 0.204 0.0999 0.900
## 5 blaCMY-17 1 resistant 1 1 0 1 1
## 6 blaCMY-2 138 resistant 136 0.986 0.0102 0.966 1
## 7 blaCMY-4 4 resistant 3 0.75 0.217 0.326 1
## 8 blaCMY-42 1 resistant 1 1 0 1 1
## 9 blaCMY-6 1 resistant 1 1 0 1 1
## 10 blaCTX-M 3 resistant 2 0.667 0.272 0.133 1
## # ℹ 54 more rows
cef_cephalosporin$combined_plot
fos_fosfomycin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="fosfomycin", refgene_subclass="FOSFOMYCIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
fos_fosfomycin$solo_stats
## # A tibble: 4 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 glpT_E448K 35 resistant 0 0 0 0 0
## 2 uhpT_E350Q 2 resistant 0 0 0 0 0
## 3 glpT_E448K 35 nonsusceptible 0 0 0 0 0
## 4 uhpT_E350Q 2 nonsusceptible 0 0 0 0 0
fos_fosfomycin$combined_plot
colistin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="colistin", refgene_subclass="COLISTIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
colistin$solo_stats
## # A tibble: 6 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 mcr-1.1 1 resistant 1 1 0 1 1
## 2 pmrB_E123D 42 resistant 0 0 0 0 0
## 3 pmrB_Y358N 7 resistant 0 0 0 0 0
## 4 mcr-1.1 1 nonsusceptible 1 1 0 1 1
## 5 pmrB_E123D 42 nonsusceptible 40 0.952 0.0329 0.888 1
## 6 pmrB_Y358N 7 nonsusceptible 4 0.571 0.187 0.205 0.938
colistin$combined_plot
cef_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftriaxone", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
cef_cephalosporin$solo_stats
## # A tibble: 64 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 2 resistant 0 0 0 0 0
## 2 ampC_C-42T 28 resistant 1 0.0357 0.0351 0 0.104
## 3 ampC_T-32A 50 resistant 4 0.08 0.0384 0.00480 0.155
## 4 blaCMY 6 resistant 3 0.5 0.204 0.0999 0.900
## 5 blaCMY-17 1 resistant 1 1 0 1 1
## 6 blaCMY-2 138 resistant 136 0.986 0.0102 0.966 1
## 7 blaCMY-4 4 resistant 3 0.75 0.217 0.326 1
## 8 blaCMY-42 1 resistant 1 1 0 1 1
## 9 blaCMY-6 1 resistant 1 1 0 1 1
## 10 blaCTX-M 3 resistant 2 0.667 0.272 0.133 1
## # ℹ 54 more rows
cef_cephalosporin$combined_plot
gentamicin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="gentamicin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
gentamicin_agly$solo_stats
## # A tibble: 36 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 16S_A523C 0 resistant 0 NaN NaN NaN NaN
## 2 16S_C1192G 1 resistant 0 0 0 0 0
## 3 aac(2')-IIa 1 resistant 0 0 0 0 0
## 4 aac(3)-IId 41 resistant 41 1 0 1 1
## 5 aac(3)-IIe 3 resistant 3 1 0 1 1
## 6 aac(3)-VIa 2 resistant 0 0 0 0 0
## 7 aac(6')-Ib 1 resistant 0 0 0 0 0
## 8 aac(6')-Ib-cr5 19 resistant 0 0 0 0 0
## 9 aac(6')-Ib4 6 resistant 0 0 0 0 0
## 10 aadA1 145 resistant 3 0.0207 0.0118 0 0.0439
## # ℹ 26 more rows
gentamicin_agly$combined_plot
amikacin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="amikacin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
amikacin_agly$solo_stats
## # A tibble: 32 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 16S_A523C 0 resistant 0 NaN NaN NaN NaN
## 2 aac(2')-IIa 1 resistant 0 0 0 0 0
## 3 aac(3)-IId 27 resistant 0 0 0 0 0
## 4 aac(3)-IIe 3 resistant 0 0 0 0 0
## 5 aac(6')-Ib 1 resistant 1 1 0 1 1
## 6 aac(6')-Ib-cr5 19 resistant 2 0.105 0.0704 0 0.243
## 7 aac(6')-Ib4 5 resistant 0 0 0 0 0
## 8 aadA1 87 resistant 1 0.0115 0.0114 0 0.0339
## 9 aadA2 23 resistant 0 0 0 0 0
## 10 aadA22 3 resistant 0 0 0 0 0
## # ℹ 22 more rows
amikacin_agly$combined_plot
tobramycin_agly <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tobramycin", refgene_class="AMINOGLYCOSIDE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
tobramycin_agly$solo_stats
## # A tibble: 30 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(2')-IIa 1 resistant 0 0 0 0 0
## 2 aac(3)-IId 27 resistant 9 0.333 0.0907 0.156 0.511
## 3 aac(3)-IIe 3 resistant 0 0 0 0 0
## 4 aac(6')-Ib 1 resistant 1 1 0 1 1
## 5 aac(6')-Ib-cr5 18 resistant 15 0.833 0.0878 0.661 1
## 6 aac(6')-Ib4 5 resistant 2 0.4 0.219 0 0.829
## 7 aadA1 87 resistant 0 0 0 0 0
## 8 aadA2 22 resistant 1 0.0455 0.0444 0 0.132
## 9 aadA22 3 resistant 0 0 0 0 0
## 10 aadA5 66 resistant 2 0.0303 0.0211 0 0.0717
## # ℹ 20 more rows
tobramycin_agly$combined_plot
# solo markers for ampicillin
ampicillin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ampicillin", refgene_class="BETA-LACTAM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
ampicillin$solo_stats
## # A tibble: 84 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 2 resistant 0 0 0 0 0
## 2 ampC_C-42T 20 resistant 20 1 0 1 1
## 3 ampC_G-15GG 0 resistant 0 NaN NaN NaN NaN
## 4 ampC_T-32A 26 resistant 26 1 0 1 1
## 5 bla 0 resistant 0 NaN NaN NaN NaN
## 6 blaCARB-2 5 resistant 5 1 0 1 1
## 7 blaCMY 4 resistant 3 0.75 0.217 0.326 1
## 8 blaCMY-17 1 resistant 1 1 0 1 1
## 9 blaCMY-2 101 resistant 100 0.990 0.00985 0.971 1
## 10 blaCMY-4 3 resistant 2 0.667 0.272 0.133 1
## # ℹ 74 more rows
ampicillin$combined_plot
# solo markers for azithromycin
azi <- solo_marker(ast=ec_amrfinder_ast, antibiotic="azithromycin", refgene_subclass="AZITHROMYCIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
azi$solo_stats
## # A tibble: 4 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 mef(B) 3 resistant 0 0 0 0 0
## 2 mph(A) 7 resistant 7 1 0 1 1
## 3 mef(B) 3 nonsusceptible 0 0 0 0 0
## 4 mph(A) 7 nonsusceptible 7 1 0 1 1
azi$combined_plot
imipenem <- solo_marker(ast=ec_amrfinder_ast, antibiotic="imipenem", refgene_subclass="CARBAPENEM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
imipenem$solo_stats
## # A tibble: 26 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 blaKPC-2 2 resistant 2 1 0 1 1
## 2 blaKPC-3 12 resistant 8 0.667 0.136 0.400 0.933
## 3 blaKPC-4 2 resistant 1 0.5 0.354 0 1
## 4 blaNDM-1 8 resistant 8 1 0 1 1
## 5 blaNDM-5 5 resistant 5 1 0 1 1
## 6 blaNDM-6 1 resistant 1 1 0 1 1
## 7 blaNDM-7 3 resistant 3 1 0 1 1
## 8 blaNDM-9 1 resistant 1 1 0 1 1
## 9 blaOXA 1 resistant 0 0 0 0 0
## 10 blaOXA-181 1 resistant 0 0 0 0 0
## # ℹ 16 more rows
imipenem$combined_plot
meropenem <- solo_marker(ast=ec_amrfinder_ast, antibiotic="meropenem", refgene_subclass="CARBAPENEM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
meropenem$solo_stats
## # A tibble: 26 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 blaKPC-2 2 resistant 1 0.5 0.354 0 1
## 2 blaKPC-3 12 resistant 6 0.5 0.144 0.217 0.783
## 3 blaKPC-4 2 resistant 0 0 0 0 0
## 4 blaNDM-1 10 resistant 10 1 0 1 1
## 5 blaNDM-5 9 resistant 9 1 0 1 1
## 6 blaNDM-6 1 resistant 1 1 0 1 1
## 7 blaNDM-7 3 resistant 3 1 0 1 1
## 8 blaNDM-9 1 resistant 1 1 0 1 1
## 9 blaOXA 1 resistant 0 0 0 0 0
## 10 blaOXA-181 1 resistant 0 0 0 0 0
## # ℹ 16 more rows
meropenem$combined_plot
ceftazidime_cephalosporin <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ceftazidime", refgene_subclass="CEPHALOSPORIN", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
ceftazidime_cephalosporin$solo_stats
## # A tibble: 70 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 ampC_C-11T 5 resistant 0 0 0 0 0
## 2 ampC_C-42T 28 resistant 1 0.0357 0.0351 0 0.104
## 3 ampC_G-15GG 4 resistant 0 0 0 0 0
## 4 ampC_T-14TGT 2 resistant 0 0 0 0 0
## 5 ampC_T-32A 66 resistant 3 0.0455 0.0256 0 0.0957
## 6 blaCMY 4 resistant 2 0.5 0.25 0.0100 0.99
## 7 blaCMY-2 132 resistant 116 0.879 0.0284 0.823 0.934
## 8 blaCMY-4 4 resistant 3 0.75 0.217 0.326 1
## 9 blaCMY-42 3 resistant 3 1 0 1 1
## 10 blaCMY-59 1 resistant 1 1 0 1 1
## # ℹ 60 more rows
ceftazidime_cephalosporin$combined_plot
chloramphenicol <- solo_marker(ast=ec_amrfinder_ast, antibiotic="chloramphenicol", refgene_class="PHENICOL", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
chloramphenicol$solo_stats
## # A tibble: 12 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 catA1 17 resistant 17 1 0 1 1
## 2 catA2 1 resistant 1 1 0 1 1
## 3 catB3 0 resistant 0 NaN NaN NaN NaN
## 4 cmlA1 20 resistant 16 0.8 0.0894 0.625 0.975
## 5 cmlA5 0 resistant 0 NaN NaN NaN NaN
## 6 floR 83 resistant 82 0.988 0.0120 0.964 1
## 7 catA1 17 nonsusceptible 17 1 0 1 1
## 8 catA2 1 nonsusceptible 1 1 0 1 1
## 9 catB3 0 nonsusceptible 0 NaN NaN NaN NaN
## 10 cmlA1 20 nonsusceptible 17 0.85 0.0798 0.694 1
## 11 cmlA5 0 nonsusceptible 0 NaN NaN NaN NaN
## 12 floR 83 nonsusceptible 82 0.988 0.0120 0.964 1
chloramphenicol$combined_plot
trimethoprim <- solo_marker(ast=ec_amrfinder_ast, antibiotic="trimethoprim-sulfamethoxazole", refgene_subclass="TRIMETHOPRIM", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
trimethoprim$solo_stats
## # A tibble: 32 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 dfrA1 141 resistant 111 0.787 0.0345 0.720 0.855
## 2 dfrA12 113 resistant 102 0.903 0.0279 0.848 0.957
## 3 dfrA14 69 resistant 63 0.913 0.0339 0.847 0.980
## 4 dfrA15 0 resistant 0 NaN NaN NaN NaN
## 5 dfrA16 4 resistant 1 0.25 0.217 0 0.674
## 6 dfrA17 475 resistant 449 0.945 0.0104 0.925 0.966
## 7 dfrA19 0 resistant 0 NaN NaN NaN NaN
## 8 dfrA25 2 resistant 2 1 0 1 1
## 9 dfrA27 1 resistant 1 1 0 1 1
## 10 dfrA29 3 resistant 3 1 0 1 1
## # ℹ 22 more rows
trimethoprim$combined_plot
tet <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tetracycline", refgene_class="TETRACYCLINE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
tet$solo_stats
## # A tibble: 14 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 acrB_R620C 3 resistant 0 0 0 0 0
## 2 tet(A) 1036 resistant 1029 0.993 0.00255 0.988 0.998
## 3 tet(B) 835 resistant 824 0.987 0.00395 0.979 0.995
## 4 tet(C) 48 resistant 16 0.333 0.0680 0.200 0.467
## 5 tet(D) 28 resistant 27 0.964 0.0351 0.896 1
## 6 tet(H) 2 resistant 2 1 0 1 1
## 7 tet(M) 6 resistant 0 0 0 0 0
## 8 acrB_R620C 3 nonsusceptible 0 0 0 0 0
## 9 tet(A) 1036 nonsusceptible 1029 0.993 0.00255 0.988 0.998
## 10 tet(B) 835 nonsusceptible 825 0.988 0.00376 0.981 0.995
## 11 tet(C) 48 nonsusceptible 44 0.917 0.0399 0.838 0.995
## 12 tet(D) 28 nonsusceptible 27 0.964 0.0351 0.896 1
## 13 tet(H) 2 nonsusceptible 2 1 0 1 1
## 14 tet(M) 6 nonsusceptible 0 0 0 0 0
tet$combined_plot
tig <- solo_marker(ast=ec_amrfinder_ast, antibiotic="tigecycline", refgene_subclass="TIGECYCLINE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
tig$solo_stats
## # A tibble: 0 × 8
## # ℹ 8 variables: gene <chr>, total <int>, category <chr>, n <int>, p <dbl>,
## # se <dbl>, ci.lower <dbl>, ci.upper <dbl>
cip <- solo_marker(ast=ec_amrfinder_ast, antibiotic="ciprofloxacin", refgene_subclass="QUINOLONE", gene_class_list=ec_gene_counts, amr_binary=amrfinder_full)
cip$solo_stats
## # A tibble: 46 × 8
## gene total category n p se ci.lower ci.upper
## <chr> <int> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 aac(6')-Ib-cr5 1 resistant 0 0 0 0 0
## 2 gyrA_D87G 4 resistant 0 0 0 0 0
## 3 gyrA_D87N 6 resistant 0 0 0 0 0
## 4 gyrA_D87Y 3 resistant 1 0.333 0.272 0 0.867
## 5 gyrA_S83A 12 resistant 0 0 0 0 0
## 6 gyrA_S83L 122 resistant 18 0.148 0.0321 0.0846 0.210
## 7 gyrA_S83V 1 resistant 0 0 0 0 0
## 8 marR_S3N 536 resistant 4 0.00746 0.00372 0.000177 0.0147
## 9 parC_A56T 7 resistant 0 0 0 0 0
## 10 parC_S57T 41 resistant 0 0 0 0 0
## # ℹ 36 more rows
cip$combined_plot